{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "# 环境\n",
        "!pip install langchain"
      ],
      "metadata": {
        "id": "iKsuAV1ytkuv",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "daed03c3-1d02-4161-bac8-293317d61436"
      },
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting langchain\n",
            "  Downloading langchain-0.1.13-py3-none-any.whl (810 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m810.5/810.5 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)\n",
            "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.29)\n",
            "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.9.3)\n",
            "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n",
            "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)\n",
            "  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)\n",
            "Collecting jsonpatch<2.0,>=1.33 (from langchain)\n",
            "  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n",
            "Collecting langchain-community<0.1,>=0.0.29 (from langchain)\n",
            "  Downloading langchain_community-0.0.29-py3-none-any.whl (1.8 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting langchain-core<0.2.0,>=0.1.33 (from langchain)\n",
            "  Downloading langchain_core-0.1.36-py3-none-any.whl (273 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m273.9/273.9 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)\n",
            "  Downloading langchain_text_splitters-0.0.1-py3-none-any.whl (21 kB)\n",
            "Collecting langsmith<0.2.0,>=0.1.17 (from langchain)\n",
            "  Downloading langsmith-0.1.38-py3-none-any.whl (86 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.9/86.9 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.25.2)\n",
            "Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.6.4)\n",
            "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.31.0)\n",
            "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.2.3)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n",
            "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n",
            "  Downloading marshmallow-3.21.1-py3-none-any.whl (49 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n",
            "  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
            "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain)\n",
            "  Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n",
            "Collecting packaging<24.0,>=23.2 (from langchain-core<0.2.0,>=0.1.33->langchain)\n",
            "  Downloading packaging-23.2-py3-none-any.whl (53 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain)\n",
            "  Downloading orjson-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m144.8/144.8 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (0.6.0)\n",
            "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (2.16.3)\n",
            "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (4.10.0)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.6)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2.0.7)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (2024.2.2)\n",
            "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n",
            "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain)\n",
            "  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n",
            "Installing collected packages: packaging, orjson, mypy-extensions, jsonpointer, typing-inspect, marshmallow, jsonpatch, langsmith, dataclasses-json, langchain-core, langchain-text-splitters, langchain-community, langchain\n",
            "  Attempting uninstall: packaging\n",
            "    Found existing installation: packaging 24.0\n",
            "    Uninstalling packaging-24.0:\n",
            "      Successfully uninstalled packaging-24.0\n",
            "Successfully installed dataclasses-json-0.6.4 jsonpatch-1.33 jsonpointer-2.4 langchain-0.1.13 langchain-community-0.0.29 langchain-core-0.1.36 langchain-text-splitters-0.0.1 langsmith-0.1.38 marshmallow-3.21.1 mypy-extensions-1.0.0 orjson-3.10.0 packaging-23.2 typing-inspect-0.9.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "JhjtGN8_rhY_"
      },
      "outputs": [],
      "source": [
        "# 获取环境变量\n",
        "from google.colab import userdata\n",
        "import os\n",
        "ANTHROPIC_API_KEY = userdata.get('ANTHROPIC_API_KEY')\n",
        "os.environ[\"ANTHROPIC_API_KEY\"] = ANTHROPIC_API_KEY\n",
        "VOYAGE_API_KEY = userdata.get('VOYAGE_API_KEY')\n",
        "os.environ[\"VOYAGE_API_KEY\"] = VOYAGE_API_KEY"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# 确保文件存在,搭载云盘\n",
        "import os\n",
        "my_files = '/content/drive/MyDrive/rag/file'\n",
        "for dirname, _, filenames in os.walk(my_files):\n",
        "    for filename in filenames:\n",
        "        print(os.path.join(dirname, filename))"
      ],
      "metadata": {
        "id": "98V_W-NMtf1Q",
        "outputId": "5d145c22-3efc-4af4-a037-27533e46734d",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/drive/MyDrive/rag/file/TM_AUDIT_LOG.txt\n",
            "/content/drive/MyDrive/rag/file/随手记.txt\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# 下面报错再执行,因为需要重启会话\n",
        "!pip install unstructured"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "arnqejPJatAd",
        "outputId": "ad47e399-a662-4ecd-fb88-3451dae8c230"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting unstructured\n",
            "  Downloading unstructured-0.12.6-py3-none-any.whl (1.8 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting backoff==2.2.1 (from unstructured)\n",
            "  Downloading backoff-2.2.1-py3-none-any.whl (15 kB)\n",
            "Requirement already satisfied: beautifulsoup4==4.12.3 in /usr/local/lib/python3.10/dist-packages (from unstructured) (4.12.3)\n",
            "Requirement already satisfied: certifi==2024.2.2 in /usr/local/lib/python3.10/dist-packages (from unstructured) (2024.2.2)\n",
            "Requirement already satisfied: chardet==5.2.0 in /usr/local/lib/python3.10/dist-packages (from unstructured) (5.2.0)\n",
            "Requirement already satisfied: charset-normalizer==3.3.2 in /usr/local/lib/python3.10/dist-packages (from unstructured) (3.3.2)\n",
            "Requirement already satisfied: click==8.1.7 in /usr/local/lib/python3.10/dist-packages (from unstructured) (8.1.7)\n",
            "Requirement already satisfied: dataclasses-json==0.6.4 in /usr/local/lib/python3.10/dist-packages (from unstructured) (0.6.4)\n",
            "Collecting dataclasses-json-speakeasy==0.5.11 (from unstructured)\n",
            "  Downloading dataclasses_json_speakeasy-0.5.11-py3-none-any.whl (28 kB)\n",
            "Collecting emoji==2.10.1 (from unstructured)\n",
            "  Downloading emoji-2.10.1-py2.py3-none-any.whl (421 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m421.5/421.5 kB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting filetype==1.2.0 (from unstructured)\n",
            "  Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)\n",
            "Requirement already satisfied: idna==3.6 in /usr/local/lib/python3.10/dist-packages (from unstructured) (3.6)\n",
            "Requirement already satisfied: joblib==1.3.2 in /usr/local/lib/python3.10/dist-packages (from unstructured) (1.3.2)\n",
            "Collecting jsonpath-python==1.0.6 (from unstructured)\n",
            "  Downloading jsonpath_python-1.0.6-py3-none-any.whl (7.6 kB)\n",
            "Collecting langdetect==1.0.9 (from unstructured)\n",
            "  Downloading langdetect-1.0.9.tar.gz (981 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m981.5/981.5 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting lxml==5.1.0 (from unstructured)\n",
            "  Downloading lxml-5.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.0 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.0/8.0 MB\u001b[0m \u001b[31m35.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting marshmallow==3.20.2 (from unstructured)\n",
            "  Downloading marshmallow-3.20.2-py3-none-any.whl (49 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: mypy-extensions==1.0.0 in /usr/local/lib/python3.10/dist-packages (from unstructured) (1.0.0)\n",
            "Requirement already satisfied: nltk==3.8.1 in /usr/local/lib/python3.10/dist-packages (from unstructured) (3.8.1)\n",
            "Collecting numpy==1.26.4 (from unstructured)\n",
            "  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.2/18.2 MB\u001b[0m \u001b[31m52.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: packaging==23.2 in /usr/local/lib/python3.10/dist-packages (from unstructured) (23.2)\n",
            "Requirement already satisfied: python-dateutil==2.8.2 in /usr/local/lib/python3.10/dist-packages (from unstructured) (2.8.2)\n",
            "Collecting python-iso639==2024.2.7 (from unstructured)\n",
            "  Downloading python_iso639-2024.2.7-py3-none-any.whl (274 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m274.7/274.7 kB\u001b[0m \u001b[31m29.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting python-magic==0.4.27 (from unstructured)\n",
            "  Downloading python_magic-0.4.27-py2.py3-none-any.whl (13 kB)\n",
            "Collecting rapidfuzz==3.6.1 (from unstructured)\n",
            "  Downloading rapidfuzz-3.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m78.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: regex==2023.12.25 in /usr/local/lib/python3.10/dist-packages (from unstructured) (2023.12.25)\n",
            "Requirement already satisfied: requests==2.31.0 in /usr/local/lib/python3.10/dist-packages (from unstructured) (2.31.0)\n",
            "Requirement already satisfied: six==1.16.0 in /usr/local/lib/python3.10/dist-packages (from unstructured) (1.16.0)\n",
            "Requirement already satisfied: soupsieve==2.5 in /usr/local/lib/python3.10/dist-packages (from unstructured) (2.5)\n",
            "Requirement already satisfied: tabulate==0.9.0 in /usr/local/lib/python3.10/dist-packages (from unstructured) (0.9.0)\n",
            "Requirement already satisfied: tqdm==4.66.2 in /usr/local/lib/python3.10/dist-packages (from unstructured) (4.66.2)\n",
            "Collecting typing-extensions==4.9.0 (from unstructured)\n",
            "  Downloading typing_extensions-4.9.0-py3-none-any.whl (32 kB)\n",
            "Requirement already satisfied: typing-inspect==0.9.0 in /usr/local/lib/python3.10/dist-packages (from unstructured) (0.9.0)\n",
            "Collecting unstructured-client==0.18.0 (from unstructured)\n",
            "  Downloading unstructured_client-0.18.0-py3-none-any.whl (21 kB)\n",
            "Collecting urllib3==1.26.18 (from unstructured)\n",
            "  Downloading urllib3-1.26.18-py2.py3-none-any.whl (143 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.8/143.8 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting wrapt==1.16.0 (from unstructured)\n",
            "  Downloading wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (80 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.3/80.3 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hBuilding wheels for collected packages: langdetect\n",
            "  Building wheel for langdetect (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993227 sha256=4b2288d7622aadb54a20d5e560f4d41e1ec0f504f9046b7c1ac4fbec8117eef4\n",
            "  Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106\n",
            "Successfully built langdetect\n",
            "Installing collected packages: filetype, wrapt, urllib3, typing-extensions, rapidfuzz, python-magic, python-iso639, numpy, marshmallow, lxml, langdetect, jsonpath-python, emoji, backoff, dataclasses-json-speakeasy, unstructured-client, unstructured\n",
            "  Attempting uninstall: wrapt\n",
            "    Found existing installation: wrapt 1.14.1\n",
            "    Uninstalling wrapt-1.14.1:\n",
            "      Successfully uninstalled wrapt-1.14.1\n",
            "  Attempting uninstall: urllib3\n",
            "    Found existing installation: urllib3 2.0.7\n",
            "    Uninstalling urllib3-2.0.7:\n",
            "      Successfully uninstalled urllib3-2.0.7\n",
            "  Attempting uninstall: typing-extensions\n",
            "    Found existing installation: typing_extensions 4.10.0\n",
            "    Uninstalling typing_extensions-4.10.0:\n",
            "      Successfully uninstalled typing_extensions-4.10.0\n",
            "  Attempting uninstall: numpy\n",
            "    Found existing installation: numpy 1.25.2\n",
            "    Uninstalling numpy-1.25.2:\n",
            "      Successfully uninstalled numpy-1.25.2\n",
            "  Attempting uninstall: marshmallow\n",
            "    Found existing installation: marshmallow 3.21.1\n",
            "    Uninstalling marshmallow-3.21.1:\n",
            "      Successfully uninstalled marshmallow-3.21.1\n",
            "  Attempting uninstall: lxml\n",
            "    Found existing installation: lxml 4.9.4\n",
            "    Uninstalling lxml-4.9.4:\n",
            "      Successfully uninstalled lxml-4.9.4\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "torch 2.2.1+cu121 requires nvidia-cublas-cu12==12.1.3.1; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cuda-cupti-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cuda-runtime-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cudnn-cu12==8.9.2.26; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cufft-cu12==11.0.2.54; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-curand-cu12==10.3.2.106; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cusolver-cu12==11.4.5.107; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cusparse-cu12==12.1.0.106; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-nccl-cu12==2.19.3; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-nvtx-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "tensorflow 2.15.0 requires wrapt<1.15,>=1.11.0, but you have wrapt 1.16.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed backoff-2.2.1 dataclasses-json-speakeasy-0.5.11 emoji-2.10.1 filetype-1.2.0 jsonpath-python-1.0.6 langdetect-1.0.9 lxml-5.1.0 marshmallow-3.20.2 numpy-1.26.4 python-iso639-2024.2.7 python-magic-0.4.27 rapidfuzz-3.6.1 typing-extensions-4.9.0 unstructured-0.12.6 unstructured-client-0.18.0 urllib3-1.26.18 wrapt-1.16.0\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.colab-display-data+json": {
              "pip_warning": {
                "packages": [
                  "urllib3"
                ]
              },
              "id": "220a55b7f1bb42748286349fcb0a4e2f"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# 加载文件夹\n",
        "from langchain_community.document_loaders import DirectoryLoader\n",
        "\n",
        "loader = DirectoryLoader(my_files)\n",
        "docs = loader.load()\n",
        "for doc in docs:\n",
        "  print(doc)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gJtgfy3quub3",
        "outputId": "c375774f-7b66-4449-89bf-cf7160be9d99"
      },
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "page_content='审计日志表(TM_AUDIT_LOG)字段含义以及字段名称如下:\\n\\nAUDIT_ID\\tAUDIT_ID\\n\\n机构号\\tORG\\n\\n更新用户\\tUPD_OPER_ID\\n\\n更新时间\\tOPT_DATA\\n\\n交易渠道\\tCHANNEL\\n\\n操作类型\\tOPT_TYPE\\n\\n审计内容\\tAUDIT_CONTEXT\\n\\n主键\\tENTITY_ID\\n\\n实体名\\tENTITY_NAME\\n\\n执行系统\\tEXE_SYSTEM' metadata={'source': '/content/drive/MyDrive/rag/file/TM_AUDIT_LOG.txt'}\n",
            "page_content='1.一拳打破天地障,亿万天魔跨空来 -- 大道争锋\\n\\n2.长路漫漫几个秋，今朝才得青云佑。青云托我瞰江湖，天地方圆一览无。\\n\\n3.曲折途穷天地窄，重重灾劫生死微。身如柳絮随飞扬，无论云泥意贯一。\\n\\n4.母弱出商贾，父强做侍郎。族望留原籍，家贫走他乡。\\n\\n5.有一句话说的好杀人放火金腰带。烧杀劫掠，敲诈勒索，才是魔道中人的风采！\\n\\n6.永生缥缈非我求，长生无为老愧羞。”\\n\\n“界壁消散乱世起，宿命一去竞自由。”\\n\\n“鹰击长空鲸霸海，不试怎知龙与蚯？”\\n\\n“凡夫俗子岂识我，非到末路不甘休！\\n\\n7.溟沧派历代祖师在上，弟子秦墨白敬告：弟子自继得掌门之位以来，为兴山门，每日禅精竭虑，夙兴夜寐，无一刻敢有懈怠，然自祖师开辟山门之后，此界灵机渐乏，以至天人失和，若不奋起，万千载后，则无溟沧派矣；弟子无能，今愿承三代掌门之智，携两殿殿主、门中众真、派外友盟铲断不平，凌驾虚空，另辟新天！此举不为私利，只为内用自足！只为道统延续！只为山门永昌！愿诸代掌门在天相佑。”\\n\\n8.若得紫气来，便寻长生去\\n\\n9.致独行者\\n\\n朔风呼啸日色昏，\\n\\n满目凄凉少行人。\\n\\n忽听长空一声笑，\\n\\n虎豹从来不成群！\\n\\n10.千古地仙随风逝。昔日三王归青冢。\\n\\n阳莽憾陨谁无败？卷土重来再称王。\\n\\n天河一挂淘龙鱼，逆天独行顾八荒。\\n\\n今日暂且展翼去，明朝登仙笞凤凰！\\n\\n11.男儿不展凌云志，空负天生八尺躯！\\n\\n12.“因为困难多壮志，不教红尘惑坚心。今身暂且栖草头，它日狂歌踏山河！”\\n\\n13.山川在理有崩竭，丘壑自古相盈虚\\n\\n14.簌簌衣巾落枣花，村南村北响缲车，牛衣古柳卖黄瓜。酒困路长惟欲睡，日高人渴漫思茶。敲门试问凡人家。\\n\\n15.永生缥缈非我求，长生无为老愧羞。\\n\\n“鹰击长空鲸霸海，不试怎知龙与蚯？”\\n\\n“凡夫俗子岂识我，非到末路不肯休！”\\n\\n16.势大者生，势弱者亡，得势者如乘舟顺流，失势者如赤手渡海，\\n\\n17.看万山红遍，层林尽染；漫江碧透，百舸争流。鹰击长空。鱼翔浅底，万类霜天竞自由。怅寥廓，问苍茫大地，谁主沉浮？”\\n\\n18.他年我若为青帝，报与桃花一处开\\n\\n19.君思仙尽头,我思君安危\\n\\n20.报仇一年都嫌晚，今日我来血屠苏。杀人留名者――宋紫星。\\n\\n21.无我相 无人相 无众生相 无寿者相,红粉骷髅，白骨皮肉\\n\\n22.上蔽天听，下诓朝野\\n\\n23.早岁哪知世事艰，仍许大爱遍人间。\\n\\n一路春风身如絮，命海浮沉结伴行。\\n\\n千慈万悲心渐软，齐心协力铸一剑。\\n\\n今朝剑指叠云处，爱蛊爱人还爱天！\\n\\n24.人们会越发清晰的感觉到，所谓活着的乐趣 在于无拘无束\\u200b\\n\\n25.大爱一生中最大的遗憾之一，便是没有救下百家的这对兄妹，当时背负着古月一族最后期盼心如死灰的大爱流浪到白骨山，意外认识了百花兄妹，小孩子特有的童真可爱，活泼灿烂让还处于灭族悲痛中的大爱心里感受到了一丝温暖，正是这丝温暖，让大爱回忆起了在山寨时的幸福时光，父亲般严厉的舅舅，刀子嘴豆腐心的舅母，总是吵着长大要成为像哥哥一样厉害的蛊师的弟弟，月色下红着脸说着 你这样的家伙，才…才不会喜欢，结婚什么的，还早了一百年呢 的青梅竹马白凝冰，以及最后一刻拼命护住自己的古月一代。那段温柔的时光，在此刻化作了力量，让大爱重新振作了起来，真的是多亏了你们呢，看着眼前的孩子，大爱心中想到，长大了一定会成为了不起的人吧，明明应该是这样的，可是自己再一次，什么都没有做到，什么都没有保护好，看着眼前早已失去呼吸的身体，她们的还只是孩子，她们的人生才刚刚开始，每每想到此处，大爱便痛不欲生，也因此，大爱在心中发下大宏愿，此生将为五域和平而奋斗，创造一个所有人都能够幸福的世界，所有的孩子都可以健康快乐成长的世界，纵死不悔。\\n\\n26.曲折途穷天地窄，重重灾劫生死微。身如柳絮随飞扬，无论云泥意贯一。(身如柳絮风飘去)\\n\\n27.少年轻负剑，玄崖寻仙楼。\\n\\n一朝得闻道，畅然天地游。\\n\\n平生舒快意，狂笔写春秋。\\n\\n长生非我愿，只解心中忧！\\n\\n28.风天语：你对方源大人做了什么，没有方源大人如何抗衡双尊？\\n\\n白凝冰：很简单，我成尊不就是了\\n\\n风天语：？\\n\\n昔日不明生何意，阴阳逆乱种魔心\\n\\n几死道消方知命，不求长生但求敌\\n\\n只身踏入千古局，众生为注仙为棋\\n\\n尝尽诸尊翻云手，从今天地我执白\\u200b\\n\\n29.黑暗是规矩的黑暗，光明是规矩的光明。\\n\\n30.左边：小施勇气，得春夏秋冬禄。\\n\\n右边：大展身手，获东南西北财。\\n\\n中间还有一个横批：时来运转。\\n\\n31.此生就愿成真月，出天山，戏云海，照古今，行走在黑暗的诸天之上。”\\n\\n32.一双两好缠绵久，万转千回缱绻多。\\n\\n细细的，慢慢地，经年累月，把岁月汇聚在一起，有曲折，有翻搓，有纠缠。\\n\\n搓草绳，不就是经历人生吗？\\n\\n33.一生唯谨慎，哪怕是有底牌，只要是胜算小，他也会选择尽量避免交战。\\n\\n他喜欢掌控局面，用各种手段尽量将胜率放大到极限。他最喜欢打的，就是必胜的战斗。\\n\\n只有到了万不得已的时候，他才会冒险激战。\\n\\n因此他常做的事情，就是欺凌弱小，掠夺资源，不断强大。强大到超越原来敌人的程度，再回来找回场子，也就是继续欺凌弱小。\\n\\n这没有什么可耻，那些为了证明自己勇敢，而去主动挑战，冒着生命危险和强敌死磕的，才是真正的蠢货。\\n\\n但偏偏这个价值观一直得到宣扬表彰，这是因为任何的组织，都需要个体的不断牺牲，来维护组织高层的利益。\\n\\n只要想想就知道，生存才是一切活动的前提。\\n\\n为了生存下来，实现心中的理想，才是一个人最大的勇敢。\\n\\n为理想而死，那是蠢货。为理想而苟且偷生的活着，那才是勇士！\\n\\n34.万里江河，苍莽大地，何时才能任我纵横？\\n\\n风云变幻，龙蛇起陆，何时才能睥睨众生？\\n\\n35.死道友不死贫道！\\n\\n36.“红尘漩涡不由己，何朝散发弄扁舟？乘风破浪三万里，方是我辈魔道人！\\n\\n37.就像是电线杆，它伫立在街道上，不招谁也不惹谁，但总会有人不长眼，走路的时候撞到电线杆上。你说这事情，能怪电线杆吗？\\n\\n然而不管是哪个世界，解决争端都不是通过讲道理，而是凭借实力。\\n\\n农夫养的一只公鸡报晓，吵得农夫睡不成懒觉。于是农夫一气之下，就将公鸡宰掉了。这似乎没什么不妥。\\n\\n不管哪个世界，大人物往往只要退让一小步，就能解决矛盾。但最终争端解决，通常是大人物分毫不退，而小人物付出巨大的牺牲。\\n\\n38.骑洋马光荣,挨洋炮可耻\\n\\n39.一看就没好好学习，课本上这个词最早出现在《范进中举》，桑是指桑树，桑树可以养蚕。梓指梓树，梓树的种子可以做蜡烛。古人多在房屋周围种桑梓，所以桑梓代指故乡家乡。\\n\\n40.正所谓最难消受美人恩。\\n\\n41.上士闻道，勤而行之；中士闻道，若存若亡；下士闻道，大笑之。不笑不足以为道。\\n\\n42.远处，青山连绵一片，宛若横卧的巨人，把灰蓝色的苍穹当做被褥盖在身上而酣睡。\\n\\n万里江河，苍莽大地，何时才能任我纵横？\\n\\n风云变幻，龙蛇起陆，何时才能睥睨众生？\\n\\n43.上士无争，下士好争，上德不德，下德执德，执着之者，不名道德。”\\n\\n44.生如蝼蚁 当有鸿鹄之志\\n\\n命比纸薄 应有不屈之心\\n\\n45.上联是：阳间三世，伤天害理皆由你，下联是：阴曹地府，古往今来放过谁，横批：你可来了。\\n\\n46.谎言不会伤人，真相才是快刀\\n\\n47.时来天地皆同力，运去英雄不自由\\n\\n48.二十年来藏剑锋，忽起长歌神鬼惊，大道苍茫斩歧路，笑看沧海听雷音！\\n\\n49.顷刻之间，一道冥冥漠漠，霄雿窅然的玄气透体而出，冲霄而起，一路扶摇直上，竟是生生撞破罡云，一气涌至九重天中！\\n\\n此气捭阖穹宇，其貌混冥，其状若虚，湮云杳渺，无涯无垠，渊渊乎难作言述，泱泱乎无以表形，溢溢洋洋，沛然莫测，充塞于天地之间。\\n\\n50.不堕轮回入大千，心传一道在人间。愿起一剑杀万劫，无情换作有情天！\\n\\n51.此身入道五百载，志气长存星斗移，当啸长歌舒胸臆，云霄一气动天地！”\\n\\n52.足踏不是山，仰首又一巅。”\\n\\n53.红尘漩涡不由己，最难消受美人恩,因为困难多壮志，不教红尘惑艰心,不到末路不肯休，方是吾辈道中人\\n\\n54.一朝踢翻金炉鼎,纵起十万八千云\\n\\n55.此剑直之亦无前，举之亦无上，案之亦无下，运之亦无旁。上法圆天，以顺三光；下法方地，以顺四时；中和民意，以安四乡。\\n\\n56.昌明隆盛之邦，礼仪簪缨之族，花柳繁盛之地，温柔富贵之乡\\n\\n57.上者看弊,下者看利\\n\\n58.有弟逗弟,无弟逗狗,无弟无狗，村头游走\\n\\n59.抬头不见朦胧月，低头不忍思伊人\\n\\n60.八山七水百传流，九路十道天下分，' metadata={'source': '/content/drive/MyDrive/rag/file/随手记.txt'}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# https://python.langchain.com/docs/use_cases/question_answering/quickstart\n",
        "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
        "\n",
        "# 切分文档\n",
        "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
        "splits = text_splitter.split_documents(docs)\n",
        "for split in splits:\n",
        "  print(split)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "1uoMWwli1BKm",
        "outputId": "89c3d415-4b3d-458f-e7dc-999b068934b2"
      },
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "page_content='审计日志表(TM_AUDIT_LOG)字段含义以及字段名称如下:\\n\\nAUDIT_ID\\tAUDIT_ID\\n\\n机构号\\tORG\\n\\n更新用户\\tUPD_OPER_ID\\n\\n更新时间\\tOPT_DATA\\n\\n交易渠道\\tCHANNEL\\n\\n操作类型\\tOPT_TYPE\\n\\n审计内容\\tAUDIT_CONTEXT\\n\\n主键\\tENTITY_ID\\n\\n实体名\\tENTITY_NAME\\n\\n执行系统\\tEXE_SYSTEM' metadata={'source': '/content/drive/MyDrive/rag/file/TM_AUDIT_LOG.txt'}\n",
            "page_content='1.一拳打破天地障,亿万天魔跨空来 -- 大道争锋\\n\\n2.长路漫漫几个秋，今朝才得青云佑。青云托我瞰江湖，天地方圆一览无。\\n\\n3.曲折途穷天地窄，重重灾劫生死微。身如柳絮随飞扬，无论云泥意贯一。\\n\\n4.母弱出商贾，父强做侍郎。族望留原籍，家贫走他乡。\\n\\n5.有一句话说的好杀人放火金腰带。烧杀劫掠，敲诈勒索，才是魔道中人的风采！\\n\\n6.永生缥缈非我求，长生无为老愧羞。”\\n\\n“界壁消散乱世起，宿命一去竞自由。”\\n\\n“鹰击长空鲸霸海，不试怎知龙与蚯？”\\n\\n“凡夫俗子岂识我，非到末路不甘休！\\n\\n7.溟沧派历代祖师在上，弟子秦墨白敬告：弟子自继得掌门之位以来，为兴山门，每日禅精竭虑，夙兴夜寐，无一刻敢有懈怠，然自祖师开辟山门之后，此界灵机渐乏，以至天人失和，若不奋起，万千载后，则无溟沧派矣；弟子无能，今愿承三代掌门之智，携两殿殿主、门中众真、派外友盟铲断不平，凌驾虚空，另辟新天！此举不为私利，只为内用自足！只为道统延续！只为山门永昌！愿诸代掌门在天相佑。”\\n\\n8.若得紫气来，便寻长生去\\n\\n9.致独行者\\n\\n朔风呼啸日色昏，\\n\\n满目凄凉少行人。\\n\\n忽听长空一声笑，\\n\\n虎豹从来不成群！\\n\\n10.千古地仙随风逝。昔日三王归青冢。\\n\\n阳莽憾陨谁无败？卷土重来再称王。\\n\\n天河一挂淘龙鱼，逆天独行顾八荒。\\n\\n今日暂且展翼去，明朝登仙笞凤凰！\\n\\n11.男儿不展凌云志，空负天生八尺躯！\\n\\n12.“因为困难多壮志，不教红尘惑坚心。今身暂且栖草头，它日狂歌踏山河！”\\n\\n13.山川在理有崩竭，丘壑自古相盈虚\\n\\n14.簌簌衣巾落枣花，村南村北响缲车，牛衣古柳卖黄瓜。酒困路长惟欲睡，日高人渴漫思茶。敲门试问凡人家。\\n\\n15.永生缥缈非我求，长生无为老愧羞。\\n\\n“鹰击长空鲸霸海，不试怎知龙与蚯？”\\n\\n“凡夫俗子岂识我，非到末路不肯休！”\\n\\n16.势大者生，势弱者亡，得势者如乘舟顺流，失势者如赤手渡海，\\n\\n17.看万山红遍，层林尽染；漫江碧透，百舸争流。鹰击长空。鱼翔浅底，万类霜天竞自由。怅寥廓，问苍茫大地，谁主沉浮？”\\n\\n18.他年我若为青帝，报与桃花一处开\\n\\n19.君思仙尽头,我思君安危\\n\\n20.报仇一年都嫌晚，今日我来血屠苏。杀人留名者――宋紫星。\\n\\n21.无我相 无人相 无众生相 无寿者相,红粉骷髅，白骨皮肉\\n\\n22.上蔽天听，下诓朝野\\n\\n23.早岁哪知世事艰，仍许大爱遍人间。' metadata={'source': '/content/drive/MyDrive/rag/file/随手记.txt'}\n",
            "page_content='17.看万山红遍，层林尽染；漫江碧透，百舸争流。鹰击长空。鱼翔浅底，万类霜天竞自由。怅寥廓，问苍茫大地，谁主沉浮？”\\n\\n18.他年我若为青帝，报与桃花一处开\\n\\n19.君思仙尽头,我思君安危\\n\\n20.报仇一年都嫌晚，今日我来血屠苏。杀人留名者――宋紫星。\\n\\n21.无我相 无人相 无众生相 无寿者相,红粉骷髅，白骨皮肉\\n\\n22.上蔽天听，下诓朝野\\n\\n23.早岁哪知世事艰，仍许大爱遍人间。\\n\\n一路春风身如絮，命海浮沉结伴行。\\n\\n千慈万悲心渐软，齐心协力铸一剑。\\n\\n今朝剑指叠云处，爱蛊爱人还爱天！\\n\\n24.人们会越发清晰的感觉到，所谓活着的乐趣 在于无拘无束\\u200b\\n\\n25.大爱一生中最大的遗憾之一，便是没有救下百家的这对兄妹，当时背负着古月一族最后期盼心如死灰的大爱流浪到白骨山，意外认识了百花兄妹，小孩子特有的童真可爱，活泼灿烂让还处于灭族悲痛中的大爱心里感受到了一丝温暖，正是这丝温暖，让大爱回忆起了在山寨时的幸福时光，父亲般严厉的舅舅，刀子嘴豆腐心的舅母，总是吵着长大要成为像哥哥一样厉害的蛊师的弟弟，月色下红着脸说着 你这样的家伙，才…才不会喜欢，结婚什么的，还早了一百年呢 的青梅竹马白凝冰，以及最后一刻拼命护住自己的古月一代。那段温柔的时光，在此刻化作了力量，让大爱重新振作了起来，真的是多亏了你们呢，看着眼前的孩子，大爱心中想到，长大了一定会成为了不起的人吧，明明应该是这样的，可是自己再一次，什么都没有做到，什么都没有保护好，看着眼前早已失去呼吸的身体，她们的还只是孩子，她们的人生才刚刚开始，每每想到此处，大爱便痛不欲生，也因此，大爱在心中发下大宏愿，此生将为五域和平而奋斗，创造一个所有人都能够幸福的世界，所有的孩子都可以健康快乐成长的世界，纵死不悔。\\n\\n26.曲折途穷天地窄，重重灾劫生死微。身如柳絮随飞扬，无论云泥意贯一。(身如柳絮风飘去)\\n\\n27.少年轻负剑，玄崖寻仙楼。\\n\\n一朝得闻道，畅然天地游。\\n\\n平生舒快意，狂笔写春秋。\\n\\n长生非我愿，只解心中忧！\\n\\n28.风天语：你对方源大人做了什么，没有方源大人如何抗衡双尊？\\n\\n白凝冰：很简单，我成尊不就是了\\n\\n风天语：？\\n\\n昔日不明生何意，阴阳逆乱种魔心\\n\\n几死道消方知命，不求长生但求敌\\n\\n只身踏入千古局，众生为注仙为棋\\n\\n尝尽诸尊翻云手，从今天地我执白\\u200b\\n\\n29.黑暗是规矩的黑暗，光明是规矩的光明。' metadata={'source': '/content/drive/MyDrive/rag/file/随手记.txt'}\n",
            "page_content='一朝得闻道，畅然天地游。\\n\\n平生舒快意，狂笔写春秋。\\n\\n长生非我愿，只解心中忧！\\n\\n28.风天语：你对方源大人做了什么，没有方源大人如何抗衡双尊？\\n\\n白凝冰：很简单，我成尊不就是了\\n\\n风天语：？\\n\\n昔日不明生何意，阴阳逆乱种魔心\\n\\n几死道消方知命，不求长生但求敌\\n\\n只身踏入千古局，众生为注仙为棋\\n\\n尝尽诸尊翻云手，从今天地我执白\\u200b\\n\\n29.黑暗是规矩的黑暗，光明是规矩的光明。\\n\\n30.左边：小施勇气，得春夏秋冬禄。\\n\\n右边：大展身手，获东南西北财。\\n\\n中间还有一个横批：时来运转。\\n\\n31.此生就愿成真月，出天山，戏云海，照古今，行走在黑暗的诸天之上。”\\n\\n32.一双两好缠绵久，万转千回缱绻多。\\n\\n细细的，慢慢地，经年累月，把岁月汇聚在一起，有曲折，有翻搓，有纠缠。\\n\\n搓草绳，不就是经历人生吗？\\n\\n33.一生唯谨慎，哪怕是有底牌，只要是胜算小，他也会选择尽量避免交战。\\n\\n他喜欢掌控局面，用各种手段尽量将胜率放大到极限。他最喜欢打的，就是必胜的战斗。\\n\\n只有到了万不得已的时候，他才会冒险激战。\\n\\n因此他常做的事情，就是欺凌弱小，掠夺资源，不断强大。强大到超越原来敌人的程度，再回来找回场子，也就是继续欺凌弱小。\\n\\n这没有什么可耻，那些为了证明自己勇敢，而去主动挑战，冒着生命危险和强敌死磕的，才是真正的蠢货。\\n\\n但偏偏这个价值观一直得到宣扬表彰，这是因为任何的组织，都需要个体的不断牺牲，来维护组织高层的利益。\\n\\n只要想想就知道，生存才是一切活动的前提。\\n\\n为了生存下来，实现心中的理想，才是一个人最大的勇敢。\\n\\n为理想而死，那是蠢货。为理想而苟且偷生的活着，那才是勇士！\\n\\n34.万里江河，苍莽大地，何时才能任我纵横？\\n\\n风云变幻，龙蛇起陆，何时才能睥睨众生？\\n\\n35.死道友不死贫道！\\n\\n36.“红尘漩涡不由己，何朝散发弄扁舟？乘风破浪三万里，方是我辈魔道人！\\n\\n37.就像是电线杆，它伫立在街道上，不招谁也不惹谁，但总会有人不长眼，走路的时候撞到电线杆上。你说这事情，能怪电线杆吗？\\n\\n然而不管是哪个世界，解决争端都不是通过讲道理，而是凭借实力。\\n\\n农夫养的一只公鸡报晓，吵得农夫睡不成懒觉。于是农夫一气之下，就将公鸡宰掉了。这似乎没什么不妥。\\n\\n不管哪个世界，大人物往往只要退让一小步，就能解决矛盾。但最终争端解决，通常是大人物分毫不退，而小人物付出巨大的牺牲。' metadata={'source': '/content/drive/MyDrive/rag/file/随手记.txt'}\n",
            "page_content='然而不管是哪个世界，解决争端都不是通过讲道理，而是凭借实力。\\n\\n农夫养的一只公鸡报晓，吵得农夫睡不成懒觉。于是农夫一气之下，就将公鸡宰掉了。这似乎没什么不妥。\\n\\n不管哪个世界，大人物往往只要退让一小步，就能解决矛盾。但最终争端解决，通常是大人物分毫不退，而小人物付出巨大的牺牲。\\n\\n38.骑洋马光荣,挨洋炮可耻\\n\\n39.一看就没好好学习，课本上这个词最早出现在《范进中举》，桑是指桑树，桑树可以养蚕。梓指梓树，梓树的种子可以做蜡烛。古人多在房屋周围种桑梓，所以桑梓代指故乡家乡。\\n\\n40.正所谓最难消受美人恩。\\n\\n41.上士闻道，勤而行之；中士闻道，若存若亡；下士闻道，大笑之。不笑不足以为道。\\n\\n42.远处，青山连绵一片，宛若横卧的巨人，把灰蓝色的苍穹当做被褥盖在身上而酣睡。\\n\\n万里江河，苍莽大地，何时才能任我纵横？\\n\\n风云变幻，龙蛇起陆，何时才能睥睨众生？\\n\\n43.上士无争，下士好争，上德不德，下德执德，执着之者，不名道德。”\\n\\n44.生如蝼蚁 当有鸿鹄之志\\n\\n命比纸薄 应有不屈之心\\n\\n45.上联是：阳间三世，伤天害理皆由你，下联是：阴曹地府，古往今来放过谁，横批：你可来了。\\n\\n46.谎言不会伤人，真相才是快刀\\n\\n47.时来天地皆同力，运去英雄不自由\\n\\n48.二十年来藏剑锋，忽起长歌神鬼惊，大道苍茫斩歧路，笑看沧海听雷音！\\n\\n49.顷刻之间，一道冥冥漠漠，霄雿窅然的玄气透体而出，冲霄而起，一路扶摇直上，竟是生生撞破罡云，一气涌至九重天中！\\n\\n此气捭阖穹宇，其貌混冥，其状若虚，湮云杳渺，无涯无垠，渊渊乎难作言述，泱泱乎无以表形，溢溢洋洋，沛然莫测，充塞于天地之间。\\n\\n50.不堕轮回入大千，心传一道在人间。愿起一剑杀万劫，无情换作有情天！\\n\\n51.此身入道五百载，志气长存星斗移，当啸长歌舒胸臆，云霄一气动天地！”\\n\\n52.足踏不是山，仰首又一巅。”\\n\\n53.红尘漩涡不由己，最难消受美人恩,因为困难多壮志，不教红尘惑艰心,不到末路不肯休，方是吾辈道中人\\n\\n54.一朝踢翻金炉鼎,纵起十万八千云\\n\\n55.此剑直之亦无前，举之亦无上，案之亦无下，运之亦无旁。上法圆天，以顺三光；下法方地，以顺四时；中和民意，以安四乡。\\n\\n56.昌明隆盛之邦，礼仪簪缨之族，花柳繁盛之地，温柔富贵之乡\\n\\n57.上者看弊,下者看利\\n\\n58.有弟逗弟,无弟逗狗,无弟无狗，村头游走' metadata={'source': '/content/drive/MyDrive/rag/file/随手记.txt'}\n",
            "page_content='54.一朝踢翻金炉鼎,纵起十万八千云\\n\\n55.此剑直之亦无前，举之亦无上，案之亦无下，运之亦无旁。上法圆天，以顺三光；下法方地，以顺四时；中和民意，以安四乡。\\n\\n56.昌明隆盛之邦，礼仪簪缨之族，花柳繁盛之地，温柔富贵之乡\\n\\n57.上者看弊,下者看利\\n\\n58.有弟逗弟,无弟逗狗,无弟无狗，村头游走\\n\\n59.抬头不见朦胧月，低头不忍思伊人\\n\\n60.八山七水百传流，九路十道天下分，' metadata={'source': '/content/drive/MyDrive/rag/file/随手记.txt'}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -qU langchain-anthropic\n",
        "!pip install -U voyageai\n",
        "!pip install chromadb\n",
        "!pip install langchainhub"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "kq6fs3ddaKB-",
        "outputId": "3de3de51-4ca3-4cfa-997f-9537f96ec253"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: voyageai in /usr/local/lib/python3.10/dist-packages (0.2.1)\n",
            "Requirement already satisfied: aiohttp<4.0,>=3.5 in /usr/local/lib/python3.10/dist-packages (from voyageai) (3.9.3)\n",
            "Requirement already satisfied: aiolimiter<2.0.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from voyageai) (1.1.0)\n",
            "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.10/dist-packages (from voyageai) (1.26.4)\n",
            "Requirement already satisfied: requests<3.0,>=2.20 in /usr/local/lib/python3.10/dist-packages (from voyageai) (2.31.0)\n",
            "Requirement already satisfied: tenacity>=8.0.1 in /usr/local/lib/python3.10/dist-packages (from voyageai) (8.2.3)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0,>=3.5->voyageai) (1.3.1)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0,>=3.5->voyageai) (23.2.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0,>=3.5->voyageai) (1.4.1)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0,>=3.5->voyageai) (6.0.5)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0,>=3.5->voyageai) (1.9.4)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0,>=3.5->voyageai) (4.0.3)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.20->voyageai) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.20->voyageai) (3.6)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.20->voyageai) (1.26.18)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0,>=2.20->voyageai) (2024.2.2)\n",
            "Requirement already satisfied: chromadb in /usr/local/lib/python3.10/dist-packages (0.4.24)\n",
            "Requirement already satisfied: build>=1.0.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.2.1)\n",
            "Requirement already satisfied: requests>=2.28 in /usr/local/lib/python3.10/dist-packages (from chromadb) (2.31.0)\n",
            "Requirement already satisfied: pydantic>=1.9 in /usr/local/lib/python3.10/dist-packages (from chromadb) (2.6.4)\n",
            "Requirement already satisfied: chroma-hnswlib==0.7.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.7.3)\n",
            "Requirement already satisfied: fastapi>=0.95.2 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.110.0)\n",
            "Requirement already satisfied: uvicorn[standard]>=0.18.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.29.0)\n",
            "Requirement already satisfied: numpy>=1.22.5 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.26.4)\n",
            "Requirement already satisfied: posthog>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (3.5.0)\n",
            "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.9.0)\n",
            "Requirement already satisfied: pulsar-client>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (3.4.0)\n",
            "Requirement already satisfied: onnxruntime>=1.14.1 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.17.1)\n",
            "Requirement already satisfied: opentelemetry-api>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.24.0)\n",
            "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.24.0)\n",
            "Requirement already satisfied: opentelemetry-instrumentation-fastapi>=0.41b0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.45b0)\n",
            "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.24.0)\n",
            "Requirement already satisfied: tokenizers>=0.13.2 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.15.2)\n",
            "Requirement already satisfied: pypika>=0.48.9 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.48.9)\n",
            "Requirement already satisfied: tqdm>=4.65.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.66.2)\n",
            "Requirement already satisfied: overrides>=7.3.1 in /usr/local/lib/python3.10/dist-packages (from chromadb) (7.7.0)\n",
            "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from chromadb) (6.4.0)\n",
            "Requirement already satisfied: grpcio>=1.58.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (1.62.1)\n",
            "Requirement already satisfied: bcrypt>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.1.2)\n",
            "Requirement already satisfied: typer>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (0.9.4)\n",
            "Requirement already satisfied: kubernetes>=28.1.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (29.0.0)\n",
            "Requirement already satisfied: tenacity>=8.2.3 in /usr/local/lib/python3.10/dist-packages (from chromadb) (8.2.3)\n",
            "Requirement already satisfied: PyYAML>=6.0.0 in /usr/local/lib/python3.10/dist-packages (from chromadb) (6.0.1)\n",
            "Requirement already satisfied: mmh3>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from chromadb) (4.1.0)\n",
            "Requirement already satisfied: orjson>=3.9.12 in /usr/local/lib/python3.10/dist-packages (from chromadb) (3.10.0)\n",
            "Requirement already satisfied: packaging>=19.1 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (23.2)\n",
            "Requirement already satisfied: pyproject_hooks in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (1.0.0)\n",
            "Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from build>=1.0.3->chromadb) (2.0.1)\n",
            "Requirement already satisfied: starlette<0.37.0,>=0.36.3 in /usr/local/lib/python3.10/dist-packages (from fastapi>=0.95.2->chromadb) (0.36.3)\n",
            "Requirement already satisfied: certifi>=14.05.14 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2024.2.2)\n",
            "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.16.0)\n",
            "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.8.2)\n",
            "Requirement already satisfied: google-auth>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (2.27.0)\n",
            "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.7.0)\n",
            "Requirement already satisfied: requests-oauthlib in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.4.1)\n",
            "Requirement already satisfied: oauthlib>=3.2.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (3.2.2)\n",
            "Requirement already satisfied: urllib3>=1.24.2 in /usr/local/lib/python3.10/dist-packages (from kubernetes>=28.1.0->chromadb) (1.26.18)\n",
            "Requirement already satisfied: coloredlogs in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n",
            "Requirement already satisfied: flatbuffers in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (24.3.25)\n",
            "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (3.20.3)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n",
            "Requirement already satisfied: deprecated>=1.2.6 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api>=1.2.0->chromadb) (1.2.14)\n",
            "Requirement already satisfied: importlib-metadata<=7.0,>=6.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api>=1.2.0->chromadb) (7.0.0)\n",
            "Requirement already satisfied: googleapis-common-protos~=1.52 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.63.0)\n",
            "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.24.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.24.0)\n",
            "Requirement already satisfied: opentelemetry-proto==1.24.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.24.0)\n",
            "Requirement already satisfied: opentelemetry-instrumentation-asgi==0.45b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.45b0)\n",
            "Requirement already satisfied: opentelemetry-instrumentation==0.45b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.45b0)\n",
            "Requirement already satisfied: opentelemetry-semantic-conventions==0.45b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.45b0)\n",
            "Requirement already satisfied: opentelemetry-util-http==0.45b0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.45b0)\n",
            "Requirement already satisfied: setuptools>=16.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (67.7.2)\n",
            "Requirement already satisfied: wrapt<2.0.0,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (1.16.0)\n",
            "Requirement already satisfied: asgiref~=3.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-instrumentation-asgi==0.45b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (3.8.1)\n",
            "Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb) (1.6)\n",
            "Requirement already satisfied: backoff>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n",
            "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb) (0.6.0)\n",
            "Requirement already satisfied: pydantic-core==2.16.3 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb) (2.16.3)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.28->chromadb) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.28->chromadb) (3.6)\n",
            "Requirement already satisfied: huggingface_hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from tokenizers>=0.13.2->chromadb) (0.20.3)\n",
            "Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.10/dist-packages (from typer>=0.9.0->chromadb) (8.1.7)\n",
            "Requirement already satisfied: h11>=0.8 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.14.0)\n",
            "Requirement already satisfied: httptools>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.1)\n",
            "Requirement already satisfied: python-dotenv>=0.13 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (1.0.1)\n",
            "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.19.0)\n",
            "Requirement already satisfied: watchfiles>=0.13 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.21.0)\n",
            "Requirement already satisfied: websockets>=10.4 in /usr/local/lib/python3.10/dist-packages (from uvicorn[standard]>=0.18.3->chromadb) (12.0)\n",
            "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (5.3.3)\n",
            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.4.0)\n",
            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (4.9)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb) (3.13.3)\n",
            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub<1.0,>=0.16.4->tokenizers>=0.13.2->chromadb) (2023.6.0)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<=7.0,>=6.0->opentelemetry-api>=1.2.0->chromadb) (3.18.1)\n",
            "Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.10/dist-packages (from starlette<0.37.0,>=0.36.3->fastapi>=0.95.2->chromadb) (3.7.1)\n",
            "Requirement already satisfied: humanfriendly>=9.1 in /usr/local/lib/python3.10/dist-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n",
            "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.4.0->starlette<0.37.0,>=0.36.3->fastapi>=0.95.2->chromadb) (1.3.1)\n",
            "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.4.0->starlette<0.37.0,>=0.36.3->fastapi>=0.95.2->chromadb) (1.2.0)\n",
            "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.6.0)\n",
            "Collecting langchainhub\n",
            "  Downloading langchainhub-0.1.15-py3-none-any.whl (4.6 kB)\n",
            "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchainhub) (2.31.0)\n",
            "Collecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)\n",
            "  Downloading types_requests-2.31.0.20240311-py3-none-any.whl (14 kB)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (3.6)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (1.26.18)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (2024.2.2)\n",
            "Collecting urllib3<3,>=1.21.1 (from requests<3,>=2->langchainhub)\n",
            "  Downloading urllib3-2.2.1-py3-none-any.whl (121 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.1/121.1 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: urllib3, types-requests, langchainhub\n",
            "  Attempting uninstall: urllib3\n",
            "    Found existing installation: urllib3 1.26.18\n",
            "    Uninstalling urllib3-1.26.18:\n",
            "      Successfully uninstalled urllib3-1.26.18\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "tensorflow 2.15.0 requires wrapt<1.15,>=1.11.0, but you have wrapt 1.16.0 which is incompatible.\n",
            "unstructured 0.12.6 requires urllib3==1.26.18, but you have urllib3 2.2.1 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed langchainhub-0.1.15 types-requests-2.31.0.20240311 urllib3-2.2.1\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# 检索器\n",
        "import voyageai\n",
        "from langchain_community.vectorstores import Chroma\n",
        "from langchain_community.embeddings import VoyageEmbeddings\n",
        "\n",
        "voyage = VoyageEmbeddings(model=\"voyage-2\")\n",
        "\n",
        "vectorstore = Chroma.from_documents(documents=splits, embedding=voyage)\n",
        "retriever = vectorstore.as_retriever()\n",
        "retriever"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "NP-eh8zn7Khu",
        "outputId": "15d9b525-a612-40a5-aeef-f097ac771a62"
      },
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "VectorStoreRetriever(tags=['Chroma', 'VoyageEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7bd60eae7ac0>)"
            ]
          },
          "metadata": {},
          "execution_count": 22
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain import hub\n",
        "prompt = hub.pull(\"rlm/rag-prompt\")\n",
        "prompt"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "-AjyvDzVZAo8",
        "outputId": "125f3460-c6c4-4283-d18a-16f12caa4827"
      },
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template=\"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\\nQuestion: {question} \\nContext: {context} \\nAnswer:\"))])"
            ]
          },
          "metadata": {},
          "execution_count": 10
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain_core.output_parsers import StrOutputParser\n",
        "from langchain_core.prompts import PromptTemplate\n",
        "from langchain_anthropic import ChatAnthropic\n",
        "from langchain_core.output_parsers import StrOutputParser\n",
        "from langchain_core.runnables import RunnablePassthrough\n",
        "# 模型\n",
        "llm = ChatAnthropic(temperature=0,model_name=\"claude-3-opus-20240229\")\n",
        "\n",
        "def format_docs(docs):\n",
        "    return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
        "\n",
        "\n",
        "# 修改之后的prompt模板\n",
        "prompt = PromptTemplate.from_template(\"\"\"\n",
        "根据文本回答问题:\n",
        "{context}\n",
        "问题:\n",
        "{question}\n",
        "找不到就请回答:\"不知道\"\n",
        "\"\"\")\n",
        "# chain\n",
        "my_chain =( {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
        "          | prompt\n",
        "          | llm\n",
        "          | StrOutputParser()\n",
        ")\n"
      ],
      "metadata": {
        "id": "5glG-RdG9y4k"
      },
      "execution_count": 26,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "question=\"风天语：你对方源大人做了什么，没有方源大人如何抗衡双尊？白凝冰怎么回答的?\"\n",
        "print(my_chain.invoke(question))\n",
        "\n",
        "question2=\"审计日志表帮我写一个查询所有更新时间的db2的sql\"\n",
        "print(my_chain.invoke(question2))\n",
        "\n",
        "question3=\"日耳曼名族知道吗?\"\n",
        "my_chain.invoke(question3)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 107
        },
        "id": "ij-dDlFCgTV8",
        "outputId": "320549a4-ac33-4236-d81f-4769384132ce"
      },
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "白凝冰回答:\"很简单,我成尊不就是了\"。\n",
            "根据审计日志表(TM_AUDIT_LOG)的字段信息,更新时间对应的字段名称是OPT_DATA,所以查询所有更新时间的DB2 SQL语句如下:\n",
            "\n",
            "SELECT OPT_DATA FROM TM_AUDIT_LOG;\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'不知道'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 27
        }
      ]
    }
  ]
}